On cherche à étudier l’effet de trois facteurs sur le transcriptome des racines d’Arabidopsis thaliana. Le CO2, au cours des études préliminaires, s’est montré peu influent en conditions contrôles de fer et de nitrates, et accentué en cas de stress nutritionnel. Nous reprennons ces résultats avec des fonctions génériques et propres pour en faire le résumé et de jolis graphes.
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## EOF within quoted string
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## number of items read is not a multiple of the number of columns
On a, pour chaque gène et chaque condition, son niveau d’expression en sortie de quantification. On labelle les conditions avec le code suivant : lettre majuscule pour le niveau fort, minuscule pour le niveau faible. Le réplicat est donné après l’underscore.
# quantification file
data <- read.csv("quantifFiles/QuantifGenesTomate.csv", h = T, sep = ",")
rownames(data) <- data$Gene
genes = which(!(grepl("__", rownames(data))))
not_quant = data[which((grepl("__", rownames(data)))), ]
data = data[genes, grepl("R", colnames(data))]
getLabel("R6")[1] "cnF_3"
[1] At_AmbientCO2_LowNitrate_Fe1
48 Levels: At_AmbientCO2_HighNitrate_Fe1 ... Sl_ElevatedCO2_LowNitrate_FeStarvation3
[1] "At_AmbientCO2_LowNitrate_Fe"
keep <- rowSums(data) >= 10
data <- data[keep, ]
group <- sapply(colnames(data), getLabel, with.rep = F)
colnames(data) <- sapply(colnames(data), getLabel)
head(data) Cnf_3 cNf_3 cNf_2 cnF_1 cnF_2 cnF_3 cNF_1 CNF_1 cNF_3 CNF_3 CNF_2
Sly00g0382751 400 320 259 280 464 370 228 321 282 348 261
Sly00g0382761 1326 755 910 1026 1429 1303 665 1030 771 1111 943
Sly00g0382831 0 1 0 0 1 0 0 0 1 4 0
cNF_2 CnF_3 cNf_1 CnF_2 CnF_1 Cnf_2 Cnf_1 CNf_2 CNf_3 cnf_2 cnf_1
Sly00g0382751 302 309 306 264 238 312 269 300 223 552 342
Sly00g0382761 882 1467 912 987 1017 1063 1146 825 791 1392 1000
Sly00g0382831 0 0 0 0 0 0 0 0 1 2 0
cnf_3 CNf_1
Sly00g0382751 401 216
Sly00g0382761 1049 687
Sly00g0382831 0 3
[ reached 'max' / getOption("max.print") -- omitted 3 rows ]
[1] 26984 24
On définit les conditions contrôle comme suit : CO2 ambiant et fort fer.
g = list()
method = "edger"
labels <- c("cNF", "cNf")
genes1 <- dualDE(data, labels, pval = 0.01, method = method) (Intercept) groupcNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_1" "cNF_3" "cNF_2" "cNf_3" "cNf_2" "cNf_1"
cNF_1 cNF_3 cNF_2 cNf_3 cNf_2 cNf_1
0.9294472 1.0049087 1.0169459 1.0356111 0.9974179 1.0156693
[1] "1210 genes DE"
[1] "839not in the Micro-Tom annotation..."
(Intercept) groupCNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNF_1" "CNF_3" "CNF_2" "CNf_2" "CNf_3" "CNf_1"
CNF_1 CNF_3 CNF_2 CNf_2 CNf_3 CNf_1
1.0313629 1.0282183 1.0297707 0.9702401 0.9896492 0.9507588
[1] "2625 genes DE"
[1] "839not in the Micro-Tom annotation..."
(Intercept) groupcnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cnF_1" "cnF_2" "cnF_3" "cnf_2" "cnf_1" "cnf_3"
cnF_1 cnF_2 cnF_3 cnf_2 cnf_1 cnf_3
1.0150039 1.0334623 1.0043332 0.9725584 1.0065616 0.9680806
[1] "1538 genes DE"
[1] "839not in the Micro-Tom annotation..."
(Intercept) groupCnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CnF_3" "CnF_2" "CnF_1" "Cnf_3" "Cnf_2" "Cnf_1"
CnF_3 CnF_2 CnF_1 Cnf_3 Cnf_2 Cnf_1
1.0393293 1.0360781 1.0266132 1.0139909 0.9444943 0.9394942
[1] "2379 genes DE"
[1] "839not in the Micro-Tom annotation..."
On visualise les gènes différentiellement exprimés en commun entre les différents niveaux des autres facteurs.
library(ggVennDiagram)
library(VennDiagram)
gene_list <- list()
for (comp in names(g)) {
print(g[[comp]])
gene_list[[comp]] <- g[[comp]]$gene_id
} gene_id a.value m.value p.value q.value rank estimatedDEG
1 Sly12g0054311 6.371697 5.535842 1.301150e-78 3.511022e-74 1 1
2 Sly04g0236861 7.433642 4.887155 3.282440e-71 4.428668e-67 2 1
3 Sly12g0050491 6.742858 9.335343 3.086687e-65 2.776372e-61 3 1
4 Sly12g0050531 7.604185 7.102317 5.540137e-61 3.737377e-57 4 1
5 Sly10g0172431 5.490205 7.591687 1.760934e-59 9.503406e-56 5 1
6 Sly12g0050521 6.902366 8.505644 3.000099e-50 1.349244e-46 6 1
7 Sly12g0050511 4.881225 11.028206 1.303417e-47 5.024486e-44 7 1
8 Sly07g0120551 -2.695178 11.152439 2.095657e-47 7.068649e-44 8 1
9 Sly07g0120541 -2.695178 12.211927 1.640548e-46 4.918728e-43 9 1
upreg
1 1
2 1
3 1
4 1
5 1
6 1
7 1
8 1
9 1
[ reached 'max' / getOption("max.print") -- omitted 1201 rows ]
gene_id a.value m.value p.value q.value rank
1 Sly07g0120541 -2.648248 13.209291 2.840421e-261 7.664593e-257 1
2 Sly12g0050501 6.517238 9.264109 1.182771e-254 1.595795e-250 2
3 Sly12g0050491 7.821594 7.815886 3.595399e-201 3.233942e-197 3
4 Sly12g0050511 6.398623 8.099531 3.090267e-192 2.084694e-188 4
5 Sly04g0249251 9.871546 5.829147 1.647612e-177 8.891832e-174 5
6 Sly07g0120551 4.155245 11.696361 1.392145e-171 6.260941e-168 6
7 Sly12g0050521 7.611793 7.094212 1.152153e-145 4.441385e-142 7
8 Sly07g0126221 8.333543 4.942850 2.700637e-138 9.109249e-135 8
9 Sly02g0349361 8.748709 5.907246 1.087525e-135 3.260642e-132 9
estimatedDEG upreg
1 1 1
2 1 1
3 1 1
4 1 1
5 1 1
6 1 1
7 1 1
8 1 1
9 1 1
[ reached 'max' / getOption("max.print") -- omitted 2616 rows ]
gene_id a.value m.value p.value q.value rank
1 Sly02g0328831 8.237194 7.565371 1.567035e-173 4.228489e-169 1
2 Sly11g0295831 12.015647 5.781858 4.143425e-135 5.590309e-131 2
3 Sly11g0302271 10.563145 8.216415 4.005794e-133 3.603078e-129 3
4 Sly12g0056631 10.976488 3.882693 4.902943e-105 3.307525e-101 4
5 Sly03g0192971 5.323233 8.560640 5.311861e-99 2.866705e-95 5
6 Sly12g0056731 10.044437 4.586915 5.725045e-97 2.574743e-93 6
7 Sly04g0251231 10.722988 3.718024 1.845001e-91 7.112215e-88 7
8 Sly06g0370891 10.827372 3.157659 3.832720e-81 1.292776e-77 8
9 Sly01g0018271 5.201038 8.194842 1.274944e-79 3.822565e-76 9
estimatedDEG upreg
1 1 1
2 1 1
3 1 1
4 1 1
5 1 1
6 1 1
7 1 1
8 1 1
9 1 1
[ reached 'max' / getOption("max.print") -- omitted 1529 rows ]
gene_id a.value m.value p.value q.value rank
1 Sly04g0255661 8.647329 3.817894 2.510523e-106 6.774396e-102 1
2 Sly12g0052261 9.792490 4.308032 1.787756e-101 2.412040e-97 2
3 Sly03g0227341 9.004696 3.832742 5.780724e-100 5.199569e-96 3
4 Sly09g0080061 8.601007 -3.257998 7.322129e-95 4.939508e-91 4
5 Sly02g0324781 12.987983 3.348579 7.155948e-87 3.861922e-83 5
6 Sly08g0265821 5.523827 6.379337 1.244827e-82 5.598403e-79 6
7 Sly02g0325141 7.298929 3.892792 5.541770e-70 2.136273e-66 7
8 Sly03g0212611 11.983296 3.399000 1.034259e-69 3.488555e-66 8
9 Sly08g0282941 9.517124 2.702746 4.384115e-69 1.314455e-65 9
estimatedDEG upreg
1 1 1
2 1 1
3 1 1
4 1 0
5 1 1
6 1 1
7 1 1
8 1 1
9 1 1
[ reached 'max' / getOption("max.print") -- omitted 2370 rows ]
partitions <- get.venn.partitions(gene_list, keep.elements = T)
partitions$shared <- rowSums(partitions[1:4])
partitions <- partitions[order(-partitions$shared), ]
# common_genes <- unlist(partitions[1, '..values..']) results <- getBM( filters =
# 'ensembl_gene_id', attributes = c('ensembl_gene_id', 'description',
# 'external_gene_name', 'entrezgene_id'), values = common_genes, mart = mart)
# results <- results[!rownames(results) %in%
# which(duplicated(results$ensembl_gene_id)), ] kable(results)
sharedBy3 <- unique(unlist(subset(partitions, partitions$shared == 3)$..values..))
a <- OntologyProfile(sharedBy3, specie = specie)[1] "839not in the Micro-Tom annotation..."
d <- data.frame(matrix(ncol = 4, nrow = 2))
colnames(d) <- names(g)
row.names(d) <- c("up", "down")
for (comp in names(g)) {
d["up", comp] <- sum(g[[comp]]$upreg == 1)
d["down", comp] <- sum(g[[comp]]$upreg == 0)
}
res <- melt(d)
res$reg = rep(c("up", "down"), 4)
genesIronAt <- res
save(genesIronAt, file = "genesIronAt.RData")
ggplot(res, aes(fill = reg, y = value, x = variable)) + geom_bar(position = "stack",
stat = "identity", alpha = 0.5, color = "black") + ggtitle("Iron effet on gene regulation") +
xlab("") + ylab("Number of differentially expressed genes") + coord_flip()